import { Tiktoken } from "js-tiktoken/lite";
import o200k_base from "js-tiktoken/ranks/o200k_base"
import fs from "fs";
import { transformString, extractFieldAndUnit, fileName } from './utils.js'


const data = fs.readFileSync(`./stock/${fileName}.txt`, 'utf8')
const tokenizer = new Tiktoken(o200k_base);

// 计算 token 数量
const { modifiedJsonString, map } = extractFieldAndUnit(transformString(data))

fs.writeFileSync('./stock/unit.json', JSON.stringify(map));
fs.writeFileSync(`./stock/${fileName}.transform.json`, modifiedJsonString);

const tokens = tokenizer.encode(modifiedJsonString);

console.log(`洗前: ${tokenizer.encode(data).length / 1024}k`);
console.log(`洗前: ${tokens.length / 1024}k`);


